This code is for the results to the question, “Do features of caregiver verbal engagement vary as a function of language group and activity?”, but conducts analyses separately per language and for separate levels for all other-child-centered activities (see pre-registration here: https://osf.io/byjfg/).

Load libraries and set theme

library(tidyverse)
library(ggpubr)
library(psych)
library(lme4)
library(lmerTest)
library(emmeans)
library(sjPlot)
library(ggeffects)

theme_set(theme_bw())

get_legend<-function(myggplot){
  tmp <- ggplot_gtable(ggplot_build(myggplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)
}

Read in data and convert

# demographics
demo_english <- read_csv("./data_demo_lena_transcripts/demo_english_ms.csv") %>% 
  rename(id = ID, hi = HI24Final, momed = Momed) %>% 
  dplyr::select(id, hi, momed) %>% 
  mutate(id = as.character(id), 
         language = "english")

demo_spanish <- read_csv("./data_demo_lena_transcripts/demo_spanish_ms.csv") %>% 
  rename(id = ID, hi = HI_18, momed = MomEd_25m) %>% 
  dplyr::select(id, hi, momed) %>% 
  mutate(id = as.character(id), 
         language = "spanish")


# NOTE about periods of non-tCDCS
# gemods refers to when there are designated start/end periods of other-directed speech (ODS); this was captured using gems (@G) using CHAT conventions
# kwalods refers to when ODS was transcribed at an utterance-level within a tCDS activity period between caregiver and child (e.g., other-directed speech in the background); this was captured per utterances using CHAT postcodes
## for tokens/min and types/min, we do not include ODS that occurred within a period of tCDS, because durations were captured by activity and not by utterance
## for mlu, we include all ODS across gemods and kwalods


# NOTE about speech == "all"
# "speech" includes two levels: all, spont
# all = refers to all speech by caregivers
# spont = refers to only speech by caregivers that was considered spontaneous rather than recited (e.g., reading book text, singing memorized common songs like itsy bitsy spider); therefore, 'spont' is a subset of 'all'


# freq
freq <- read_csv("./data_demo_lena_transcripts/freq.csv") %>% 
  filter(activity != "kwalods", 
         speech == "all") %>% 
  mutate(activity = recode(activity, "gemods" = "non_tcds")) %>% 
  mutate(id = factor(id), 
         language = factor(language),
         activity = factor(activity, levels = c("books", "play", "food", 
                                                "routines", "conv", "ac", "non_tcds")))


# mlu
mlu <- read_csv("./data_demo_lena_transcripts/mlu.csv") %>% 
  filter(speech == "all") %>% 
  mutate(activity = recode(activity, "ods" = "non_tcds")) %>% 
  mutate(id = factor(id), 
         language = factor(language),
         activity = factor(activity, levels = c("books", "play", "food", 
                                                "routines", "conv", "ac", "non_tcds")))


# chip
# this includes only caregivers, therefore there is no speaker column
# we exclude periods of ODS because this is about responsiveness to the child during periods of tCDS
chip <- read_csv("./data_demo_lena_transcripts/chip.csv") %>% 
  filter(activity != "ods") %>% 
  mutate(id = factor(id), 
         language = factor(language),
         activity = factor(activity, levels = c("books", "play", "food", 
                                                "routines", "conv", "ac", "non_tcds")))
  

str(freq)
## spec_tbl_df[,13] [3,308 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ X1           : num [1:3308] 1 2 3 4 5 6 7 8 9 10 ...
##  $ id           : Factor w/ 90 levels "7292","7352",..: 47 47 47 47 50 50 52 52 52 52 ...
##  $ rectime      : num [1:3308] 11923 11923 31360 31360 21499 ...
##  $ activity     : Factor w/ 7 levels "books","play",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ speaker      : chr [1:3308] "CHI" "ADULTS" "CHI" "ADULTS" ...
##  $ tokens       : num [1:3308] 30 151 35 143 58 588 42 286 33 152 ...
##  $ types        : num [1:3308] 17 70 17 65 17 199 19 53 17 59 ...
##  $ segment_num  : num [1:3308] 12 12 15 15 2 2 11 11 5 5 ...
##  $ language     : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
##  $ speech       : chr [1:3308] "all" "all" "all" "all" ...
##  $ dur_min      : num [1:3308] 3.55 3.55 6.57 6.57 4.71 ...
##  $ tokens_permin: num [1:3308] 8.46 42.57 5.32 21.75 12.31 ...
##  $ types_permin : num [1:3308] 4.79 19.73 2.59 9.89 3.61 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   X1 = col_double(),
##   ..   id = col_double(),
##   ..   rectime = col_double(),
##   ..   activity = col_character(),
##   ..   speaker = col_character(),
##   ..   tokens = col_double(),
##   ..   types = col_double(),
##   ..   segment_num = col_double(),
##   ..   language = col_character(),
##   ..   speech = col_character(),
##   ..   dur_min = col_double(),
##   ..   tokens_permin = col_double(),
##   ..   types_permin = col_double()
##   .. )
str(mlu)
## spec_tbl_df[,9] [3,002 × 9] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ id         : Factor w/ 90 levels "7292","7352",..: 46 46 46 46 46 46 46 46 46 46 ...
##  $ activity   : Factor w/ 7 levels "books","play",..: 6 6 5 5 7 7 2 2 6 6 ...
##  $ speaker    : chr [1:3002] "ADULTS" "CHI" "ADULTS" "CHI" ...
##  $ segment_num: num [1:3002] 2 2 2 2 2 2 2 2 3 3 ...
##  $ words_sum  : num [1:3002] 210 66 175 43 11 16 189 47 261 78 ...
##  $ num_utt_sum: num [1:3002] 66 35 64 24 2 12 64 28 87 43 ...
##  $ mlu_w      : num [1:3002] 3.18 1.89 2.73 1.79 5.5 ...
##  $ language   : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
##  $ speech     : chr [1:3002] "all" "all" "all" "all" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   id = col_double(),
##   ..   activity = col_character(),
##   ..   speaker = col_character(),
##   ..   segment_num = col_double(),
##   ..   words_sum = col_double(),
##   ..   num_utt_sum = col_double(),
##   ..   mlu_w = col_double(),
##   ..   language = col_character(),
##   ..   speech = col_character()
##   .. )
str(chip)
## spec_tbl_df[,11] [1,118 × 11] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ activity                         : Factor w/ 7 levels "books","play",..: 6 5 2 6 5 4 6 5 4 2 ...
##  $ id                               : Factor w/ 90 levels "7292","7352",..: 46 46 46 46 46 46 46 46 46 46 ...
##  $ rectime                          : num [1:1118] 15242 15242 15242 14342 14342 ...
##  $ total_adult_utt                  : num [1:1118] 68 64 65 91 43 13 50 8 65 127 ...
##  $ total_child_utt                  : num [1:1118] 46 34 33 54 17 3 14 1 29 49 ...
##  $ total_adult_resp                 : num [1:1118] 62 51 54 77 24 9 30 4 56 106 ...
##  $ total_adult_imitexp              : num [1:1118] 18 13 15 25 5 2 9 0 16 21 ...
##  $ prop_adultresp_outof_childutt    : num [1:1118] 1.35 1.5 1.64 1.43 1.41 ...
##  $ prop_adult_imitexp_outof_childutt: num [1:1118] 0.391 0.382 0.455 0.463 0.294 ...
##  $ language                         : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
##  $ segment_num                      : num [1:1118] 2 2 2 3 3 3 4 4 4 5 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   activity = col_character(),
##   ..   id = col_double(),
##   ..   rectime = col_double(),
##   ..   total_adult_utt = col_double(),
##   ..   total_child_utt = col_double(),
##   ..   total_adult_resp = col_double(),
##   ..   total_adult_imitexp = col_double(),
##   ..   prop_adultresp_outof_childutt = col_double(),
##   ..   prop_adult_imitexp_outof_childutt = col_double(),
##   ..   language = col_character(),
##   ..   segment_num = col_double()
##   .. )

Create dfs for ADULTS and CHI

# FREQ
freq_adult_en <- freq %>% 
  filter(speaker == "ADULTS") %>% 
  filter(language == "english")

freq_adult_sp <- freq %>% 
  filter(speaker == "ADULTS") %>% 
  filter(language == "spanish")


# MLU
mlu_adult_en <- mlu %>% 
  filter(speaker == "ADULTS") %>% 
  filter(language == "english")

mlu_adult_sp <- mlu %>% 
  filter(speaker == "ADULTS") %>% 
  filter(language == "spanish")

Prep data for mixed models

Create tokens rate per hour - Children

freq_hr_child <- read_csv("./data_demo_lena_transcripts/freq.csv") %>% 
  dplyr::select(-X1) %>% 
  filter(speech == "all", 
         speaker == "CHI") %>% 
  group_by(id) %>% 
  mutate(tokens_sum_child = sum(tokens), 
         dur_hr = sum(dur_min)/60, 
         tokens_hr_child = tokens_sum_child/dur_hr) %>% 
  distinct(id, language, tokens_hr_child) %>% 
  ungroup() %>% 
  mutate(id = as.character(id))

# per language
freq_hr_child_en <- freq_hr_child %>% filter(language == "english")
freq_hr_child_sp <- freq_hr_child %>% filter(language == "spanish")

Merge freq_adult, child tokens per hour, and demographic info

# freq
freq_all_mm_en <- freq_adult_en %>% 
  mutate(id = as.character(id)) %>% 
  left_join(freq_hr_child_en, by = c("id", "language")) %>% 
  left_join(demo_english, by = c("id", "language"))

freq_all_mm_sp <- freq_adult_sp %>% 
  mutate(id = as.character(id)) %>% 
  left_join(freq_hr_child_sp, by = c("id", "language")) %>% 
  left_join(demo_spanish, by = c("id", "language"))



# mlu
mlu_all_mm_en <- mlu_adult_en %>% 
  mutate(id = as.character(id)) %>% 
  left_join(freq_hr_child_en, by = c("id", "language")) %>% 
  left_join(demo_english, by = c("id", "language"))

mlu_all_mm_sp <- mlu_adult_sp %>% 
  mutate(id = as.character(id)) %>% 
  left_join(freq_hr_child_sp, by = c("id", "language")) %>% 
  left_join(demo_spanish, by = c("id", "language"))



# chip
chip_mm_en <- chip %>% 
  filter(language == "english") %>% 
  mutate(id = as.character(id)) %>% 
  left_join(freq_hr_child_en, by = c("id", "language")) %>% 
  left_join(demo_english, by = c("id", "language"))

chip_mm_sp <- chip %>% 
  filter(language == "spanish") %>%  
  mutate(id = as.character(id)) %>% 
  left_join(freq_hr_child_sp, by = c("id", "language")) %>% 
  left_join(demo_spanish, by = c("id", "language"))

Mixed models - 7 categories

TOKENS x ACTIVITY - ENGLISH - ALL TALK

# comparing models
m1_english_tokens_all <- lmer(tokens_permin ~ tokens_hr_child + 
                                (1 | id),
                                data = freq_all_mm_en, REML = F)

m2_english_tokens_all <- lmer(tokens_permin ~ tokens_hr_child + activity +
                                (1 | id),
                                data = freq_all_mm_en, REML = F)

# see if adding the intx adds
anova(m1_english_tokens_all, m2_english_tokens_all)
## Data: freq_all_mm_en
## Models:
## m1_english_tokens_all: tokens_permin ~ tokens_hr_child + (1 | id)
## m2_english_tokens_all: tokens_permin ~ tokens_hr_child + activity + (1 | id)
##                       npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## m1_english_tokens_all    4 8952.9 8971.9 -4472.4   8944.9                         
## m2_english_tokens_all   10 8796.7 8844.3 -4388.4   8776.7 168.17  6  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# anova table for final model
anova(m2_english_tokens_all)
## Type III Analysis of Variance Table with Satterthwaite's method
##                 Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## tokens_hr_child  18477   18477     1  45.03  12.703 0.0008779 ***
## activity        270623   45104     6 839.48  31.008 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# emmeans
m2_english_tokens_all_emmeans_activity <- emmeans(m2_english_tokens_all, ~ activity)

m2_english_tokens_all_emmeans_activity
##  activity emmean   SE  df lower.CL upper.CL
##  books      92.5 6.10 678     80.5    104.4
##  play       62.3 4.33 369     53.8     70.8
##  food       60.6 5.41 594     49.9     71.2
##  routines   70.1 5.35 592     59.6     80.6
##  conv       74.6 3.88 280     67.0     82.3
##  ac         76.0 3.51 203     69.0     82.9
##  non_tcds   36.9 3.08 129     30.8     43.0
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
pairs(m2_english_tokens_all_emmeans_activity)
##  contrast            estimate   SE  df t.ratio p.value
##  books - play           30.14 6.95 860  4.337  0.0003 
##  books - food           31.91 7.63 854  4.184  0.0006 
##  books - routines       22.34 7.60 854  2.939  0.0525 
##  books - conv           17.82 6.63 851  2.690  0.1021 
##  books - ac             16.51 6.48 860  2.548  0.1438 
##  books - non_tcds       55.53 6.21 854  8.941  <.0001 
##  play - food             1.77 6.37 858  0.278  1.0000 
##  play - routines        -7.80 6.31 856 -1.236  0.8802 
##  play - conv           -12.32 5.07 848 -2.431  0.1870 
##  play - ac             -13.63 4.80 851 -2.837  0.0695 
##  play - non_tcds        25.39 4.48 846  5.663  <.0001 
##  food - routines        -9.57 7.06 850 -1.355  0.8252 
##  food - conv           -14.09 6.02 847 -2.342  0.2254 
##  food - ac             -15.40 5.80 849 -2.656  0.1109 
##  food - non_tcds        23.62 5.54 847  4.264  0.0004 
##  routines - conv        -4.52 5.98 847 -0.756  0.9889 
##  routines - ac          -5.83 5.72 842 -1.018  0.9499 
##  routines - non_tcds    33.20 5.48 842  6.058  <.0001 
##  conv - ac              -1.31 4.39 841 -0.298  0.9999 
##  conv - non_tcds        37.71 4.06 836  9.299  <.0001 
##  ac - non_tcds          39.02 3.71 832 10.531  <.0001 
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 7 estimates
# plot
tokens_emmeans_en <- data.frame(emmeans(m2_english_tokens_all, ~ activity))


tokens_en <- ggplot(tokens_emmeans_en, aes(activity, emmean, colour = activity)) + 
  geom_pointrange(aes(ymin = lower.CL, ymax = upper.CL),
                  position = position_dodge(width = 0.2), 
                  size = 1) +
  scale_color_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black"),
                     name="Activity",
                     labels=c('Books','Play', 'Food', 'Routines', 'Unst. Conv.', 'Adult-Cent', 'non-tCDS')) +
  theme(legend.position = "none",
        text = element_text(size = 20),
        axis.text.x = element_text(angle = 40, vjust = .9, hjust=.9)) +
  scale_x_discrete(labels = c('Books','Play', 'Food', 'Routines', 'Unst. Conv.', 'Adult-Cent', 'non-tCDS')) +
  labs(x = "", y = "EMM", title = "English - Tokens (rate per min)")


tokens_en

# model diagnostics
# only takes into account fixed effects, not random effects
plot_model(m2_english_tokens_all, type = "diag") 
## [[1]]

## 
## [[2]]
## [[2]]$id

## 
## 
## [[3]]

## 
## [[4]]

TOKENS x ACTIVITY - SPANISH - ALL TALK

# comparing models
m1_spanish_tokens_all <- lmer(tokens_permin ~ tokens_hr_child + 
                                (1 | id),
                                data = freq_all_mm_sp, REML = F)

m2_spanish_tokens_all <- lmer(tokens_permin ~ tokens_hr_child + activity +
                                (1 | id),
                                data = freq_all_mm_sp, REML = F)

# see if adding the intx adds
anova(m1_spanish_tokens_all, m2_spanish_tokens_all)
## Data: freq_all_mm_sp
## Models:
## m1_spanish_tokens_all: tokens_permin ~ tokens_hr_child + (1 | id)
## m2_spanish_tokens_all: tokens_permin ~ tokens_hr_child + activity + (1 | id)
##                       npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## m1_spanish_tokens_all    4 7949.3 7968.0 -3970.6   7941.3                         
## m2_spanish_tokens_all   10 7851.4 7898.2 -3915.7   7831.4 109.83  6  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# anova table for final model
anova(m2_spanish_tokens_all)
## Type III Analysis of Variance Table with Satterthwaite's method
##                 Sum Sq Mean Sq NumDF  DenDF F value  Pr(>F)    
## tokens_hr_child   4435  4434.8     1  43.75  4.1682 0.04725 *  
## activity        125494 20915.7     6 763.78 19.6587 < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# emmeans
m2_spanish_tokens_all_emmeans_activity <- emmeans(m2_spanish_tokens_all, ~ activity)

m2_spanish_tokens_all_emmeans_activity
##  activity emmean   SE  df lower.CL upper.CL
##  books      67.4 6.02 627     55.6     79.2
##  play       51.0 4.38 379     42.4     59.6
##  food       39.4 5.03 509     29.6     49.3
##  routines   59.9 4.83 477     50.4     69.3
##  conv       57.2 3.73 241     49.9     64.6
##  ac         57.2 3.25 149     50.7     63.6
##  non_tcds   31.2 2.92 101     25.4     37.0
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
pairs(m2_spanish_tokens_all_emmeans_activity)
##  contrast            estimate   SE  df t.ratio p.value
##  books - play         16.4460 6.81 782  2.416  0.1933 
##  books - food         27.9722 7.31 787  3.826  0.0027 
##  books - routines      7.5614 7.15 784  1.058  0.9400 
##  books - conv         10.1976 6.43 785  1.585  0.6921 
##  books - ac           10.2591 6.20 790  1.654  0.6472 
##  books - non_tcds     36.2247 5.98 785  6.055  <.0001 
##  play - food          11.5261 5.98 775  1.926  0.4633 
##  play - routines      -8.8846 5.78 768 -1.536  0.7228 
##  play - conv          -6.2485 4.91 769 -1.272  0.8648 
##  play - ac            -6.1870 4.57 772 -1.355  0.8256 
##  play - non_tcds      19.7786 4.33 767  4.573  0.0001 
##  food - routines     -20.4108 6.31 772 -3.237  0.0214 
##  food - conv         -17.7746 5.50 770 -3.234  0.0216 
##  food - ac           -17.7131 5.17 770 -3.424  0.0115 
##  food - non_tcds       8.2525 4.98 770  1.657  0.6453 
##  routines - conv       2.6362 5.29 762  0.499  0.9989 
##  routines - ac         2.6976 5.00 769  0.540  0.9982 
##  routines - non_tcds  28.6632 4.78 765  6.000  <.0001 
##  conv - ac             0.0615 3.95 768  0.016  1.0000 
##  conv - non_tcds      26.0271 3.67 763  7.093  <.0001 
##  ac - non_tcds        25.9656 3.17 761  8.183  <.0001 
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 7 estimates
# plot
tokens_emmeans_sp <- data.frame(emmeans(m2_spanish_tokens_all, ~ activity))


tokens_sp <- ggplot(tokens_emmeans_sp, aes(activity, emmean, colour = activity)) + 
  geom_pointrange(aes(ymin = lower.CL, ymax = upper.CL),
                  position = position_dodge(width = 0.2), 
                  size = 1) +
  scale_color_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black"),
                     name="Activity",
                     labels=c('Books','Play', 'Food', 'Routines', 'Unst. Conv.', 'Adult-Cent', 'non-tCDS')) +
  theme(legend.position = "none",
        text = element_text(size = 20),
        axis.text.x = element_text(angle = 40, vjust = .9, hjust=.9)) +
  scale_x_discrete(labels = c('Books','Play', 'Food', 'Routines', 'Unst. Conv.', 'Adult-Cent', 'non-tCDS')) +
  labs(x = "", y = "EMM", title = "Spanish - Tokens (rate per min)")


tokens_sp

# model diagnostics
# only takes into account fixed effects, not random effects
plot_model(m2_spanish_tokens_all, type = "diag") 
## [[1]]

## 
## [[2]]
## [[2]]$id

## 
## 
## [[3]]

## 
## [[4]]

TYPES x ACTIVITY - ENGLISH - ALL TALK

# comparing models
m1_english_types_all <- lmer(types_permin ~ tokens_hr_child + 
                                (1 | id),
                                data = freq_all_mm_en, REML = F)

m2_english_types_all <- lmer(types_permin ~ tokens_hr_child + activity +
                                (1 | id),
                                data = freq_all_mm_en, REML = F)

# see if adding the intx adds
anova(m1_english_types_all, m2_english_types_all)
## Data: freq_all_mm_en
## Models:
## m1_english_types_all: types_permin ~ tokens_hr_child + (1 | id)
## m2_english_types_all: types_permin ~ tokens_hr_child + activity + (1 | id)
##                      npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## m1_english_types_all    4 8516.0 8535.0 -4254.0   8508.0                         
## m2_english_types_all   10 8411.8 8459.4 -4195.9   8391.8 116.17  6  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# anova table for final model
anova(m2_english_types_all)
## Type III Analysis of Variance Table with Satterthwaite's method
##                 Sum Sq Mean Sq NumDF DenDF F value    Pr(>F)    
## tokens_hr_child  10350   10350     1  44.8  10.794  0.001982 ** 
## activity        119808   19968     6 849.3  20.825 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# emmeans
m2_english_types_all_emmeans_activity <- emmeans(m2_english_types_all, ~ activity)

m2_english_types_all_emmeans_activity
##  activity emmean   SE  df lower.CL upper.CL
##  books      29.3 4.74 770     20.0     38.6
##  play       27.1 3.25 539     20.7     33.5
##  food       34.6 4.18 732     26.4     42.8
##  routines   38.9 4.13 742     30.8     47.0
##  conv       39.4 2.87 452     33.7     45.0
##  ac         52.5 2.54 338     47.5     57.5
##  non_tcds   20.6 2.15 210     16.4     24.9
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
pairs(m2_english_types_all_emmeans_activity)
##  contrast            estimate   SE  df t.ratio p.value
##  books - play           2.213 5.59 870  0.396  0.9997 
##  books - food          -5.323 6.15 866 -0.866  0.9774 
##  books - routines      -9.617 6.12 867 -1.570  0.7015 
##  books - conv         -10.048 5.34 864 -1.881  0.4937 
##  books - ac           -23.162 5.21 870 -4.446  0.0002 
##  books - non_tcds       8.674 5.00 866  1.733  0.5937 
##  play - food           -7.536 5.12 869 -1.471  0.7621 
##  play - routines      -11.830 5.08 868 -2.327  0.2322 
##  play - conv          -12.261 4.09 861 -2.998  0.0442 
##  play - ac            -25.375 3.87 863 -6.550  <.0001 
##  play - non_tcds        6.461 3.62 858  1.784  0.5592 
##  food - routines       -4.294 5.70 863 -0.754  0.9890 
##  food - conv           -4.725 4.86 860 -0.973  0.9598 
##  food - ac            -17.839 4.68 862 -3.814  0.0028 
##  food - non_tcds       13.997 4.47 860  3.129  0.0299 
##  routines - conv       -0.431 4.83 859 -0.089  1.0000 
##  routines - ac        -13.545 4.63 853 -2.927  0.0541 
##  routines - non_tcds   18.291 4.43 854  4.130  0.0008 
##  conv - ac            -13.114 3.55 851 -3.690  0.0044 
##  conv - non_tcds       18.722 3.28 843  5.702  <.0001 
##  ac - non_tcds         31.836 3.00 838 10.603  <.0001 
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 7 estimates
# plot
types_emmeans_en <- data.frame(emmeans(m2_english_types_all, ~ activity))


types_en <- ggplot(types_emmeans_en, aes(activity, emmean, colour = activity)) + 
  geom_pointrange(aes(ymin = lower.CL, ymax = upper.CL),
                  position = position_dodge(width = 0.2), 
                  size = 1) +
  scale_color_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) + 
  theme(legend.position = "none",
        text = element_text(size = 20),
        axis.text.x = element_text(angle = 40, vjust = .9, hjust=.9)) +
  scale_x_discrete(labels = c('Books','Play', 'Food', 'Routines', 'Unst. Conv.', 'Adult-Cent', 'non-tCDS')) +
  labs(x = "", y = "EMM", title = "English - Types (rate per min)")

types_en

# model diagnostics
# only takes into account fixed effects, not random effects
plot_model(m2_english_types_all, type = "diag")
## [[1]]

## 
## [[2]]
## [[2]]$id

## 
## 
## [[3]]

## 
## [[4]]

TYPES x ACTIVITY - SPANISH - ALL TALK

# comparing models
m1_spanish_types_all <- lmer(types_permin ~ tokens_hr_child + 
                                (1 | id),
                                data = freq_all_mm_sp, REML = F)

m2_spanish_types_all <- lmer(types_permin ~ tokens_hr_child + activity +
                                (1 | id),
                                data = freq_all_mm_sp, REML = F)

# see if adding the intx adds
anova(m1_spanish_types_all, m2_spanish_types_all)
## Data: freq_all_mm_sp
## Models:
## m1_spanish_types_all: types_permin ~ tokens_hr_child + (1 | id)
## m2_spanish_types_all: types_permin ~ tokens_hr_child + activity + (1 | id)
##                      npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## m1_spanish_types_all    4 7583.5 7602.2 -3787.7   7575.5                         
## m2_spanish_types_all   10 7530.2 7577.0 -3755.1   7510.2 65.261  6  3.816e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# anova table for final model
anova(m2_spanish_types_all)
## Type III Analysis of Variance Table with Satterthwaite's method
##                 Sum Sq Mean Sq NumDF  DenDF F value   Pr(>F)    
## tokens_hr_child   2480  2480.4     1  42.34  3.3511   0.0742 .  
## activity         50490  8414.9     6 774.64 11.3691 3.34e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# emmeans
m2_spanish_types_all_emmeans_activity <- emmeans(m2_spanish_types_all, ~ activity)

m2_spanish_types_all_emmeans_activity
##  activity emmean   SE  df lower.CL upper.CL
##  books      22.9 4.74 717     13.6     32.2
##  play       21.3 3.33 608     14.8     27.9
##  food       19.4 3.91 696     11.7     27.0
##  routines   31.4 3.73 690     24.1     38.7
##  conv       30.6 2.74 452     25.2     36.0
##  ac         38.0 2.27 281     33.6     42.5
##  non_tcds   18.2 1.94 178     14.4     22.0
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
pairs(m2_spanish_types_all_emmeans_activity)
##  contrast            estimate   SE  df t.ratio p.value
##  books - play           1.624 5.62 798  0.289  1.0000 
##  books - food           3.595 6.01 800  0.598  0.9969 
##  books - routines      -8.430 5.89 799 -1.431  0.7847 
##  books - conv          -7.672 5.30 800 -1.448  0.7753 
##  books - ac           -15.076 5.10 800 -2.959  0.0496 
##  books - non_tcds       4.745 4.93 799  0.963  0.9617 
##  play - food            1.971 4.95 791  0.398  0.9997 
##  play - routines      -10.054 4.80 782 -2.095  0.3563 
##  play - conv           -9.296 4.08 782 -2.281  0.2546 
##  play - ac            -16.700 3.78 786 -4.413  0.0002 
##  play - non_tcds        3.122 3.59 779  0.870  0.9769 
##  food - routines      -12.025 5.22 787 -2.302  0.2443 
##  food - conv          -11.267 4.56 785 -2.473  0.1705 
##  food - ac            -18.671 4.29 784 -4.352  0.0003 
##  food - non_tcds        1.150 4.13 784  0.279  1.0000 
##  routines - conv        0.758 4.40 772  0.172  1.0000 
##  routines - ac         -6.646 4.14 782 -1.604  0.6801 
##  routines - non_tcds   13.175 3.97 776  3.320  0.0163 
##  conv - ac             -7.404 3.27 781 -2.261  0.2645 
##  conv - non_tcds       12.417 3.05 772  4.071  0.0010 
##  ac - non_tcds         19.821 2.64 768  7.509  <.0001 
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 7 estimates
# plot
types_emmeans_sp <- data.frame(emmeans(m2_spanish_types_all, ~ activity))


types_sp <- ggplot(types_emmeans_sp, aes(activity, emmean, colour = activity)) + 
  geom_pointrange(aes(ymin = lower.CL, ymax = upper.CL),
                  position = position_dodge(width = 0.2), 
                  size = 1) +
  scale_color_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) + 
  theme(legend.position = "none",
        text = element_text(size = 20),
        axis.text.x = element_text(angle = 40, vjust = .9, hjust=.9)) +
  scale_x_discrete(labels = c('Books','Play', 'Food', 'Routines', 'Unst. Conv.', 'Adult-Cent', 'non-tCDS')) +
  labs(x = "", y = "EMM", title = "Spanish - Types (rate per min)")

types_sp

# model diagnostics
# only takes into account fixed effects, not random effects
plot_model(m2_spanish_types_all, type = "diag")
## [[1]]

## 
## [[2]]
## [[2]]$id

## 
## 
## [[3]]

## 
## [[4]]

MLUw x ACTIVITY - ENGLISH - ALL TALK

# comparing models
m1_english_mlu_all <- lmer(mlu_w ~ tokens_hr_child + 
                                (1 | id),
                                data = mlu_all_mm_en, REML = F)

m2_english_mlu_all <- lmer(mlu_w ~ tokens_hr_child + activity +
                                (1 | id),
                                data = mlu_all_mm_en, REML = F)

# see if adding the intx adds
anova(m1_english_mlu_all, m2_english_mlu_all)
## Data: mlu_all_mm_en
## Models:
## m1_english_mlu_all: mlu_w ~ tokens_hr_child + (1 | id)
## m2_english_mlu_all: mlu_w ~ tokens_hr_child + activity + (1 | id)
##                    npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## m1_english_mlu_all    4 2444.0 2462.8 -1218.0   2436.0                         
## m2_english_mlu_all   10 2354.3 2401.1 -1167.1   2334.3 101.78  6  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# anova table for final model
anova(m2_english_mlu_all)
## Type III Analysis of Variance Table with Satterthwaite's method
##                  Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## tokens_hr_child  13.439  13.439     1  44.46  13.410 0.0006624 ***
## activity        108.789  18.131     6 773.32  18.092 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# emmeans
m2_english_mlu_all_emmeans_activity <- emmeans(m2_english_mlu_all, ~ activity)

m2_english_mlu_all_emmeans_activity
##  activity emmean     SE  df lower.CL upper.CL
##  books      4.80 0.1658 548     4.48     5.13
##  play       3.53 0.1211 270     3.30     3.77
##  food       3.56 0.1480 456     3.27     3.85
##  routines   3.70 0.1464 451     3.41     3.99
##  conv       3.75 0.1095 199     3.54     3.97
##  ac         3.51 0.1010 150     3.31     3.71
##  non_tcds   4.24 0.0963 126     4.05     4.44
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
pairs(m2_english_mlu_all_emmeans_activity)
##  contrast            estimate    SE  df t.ratio p.value
##  books - play          1.2703 0.184 792  6.905  <.0001 
##  books - food          1.2445 0.201 786  6.180  <.0001 
##  books - routines      1.1056 0.201 786  5.510  <.0001 
##  books - conv          1.0509 0.175 784  6.009  <.0001 
##  books - ac            1.2923 0.172 792  7.529  <.0001 
##  books - non_tcds      0.5601 0.168 788  3.338  0.0154 
##  play - food          -0.0258 0.168 789 -0.153  1.0000 
##  play - routines      -0.1647 0.167 787 -0.987  0.9569 
##  play - conv          -0.2194 0.134 780 -1.639  0.6569 
##  play - ac             0.0220 0.127 783  0.173  1.0000 
##  play - non_tcds      -0.7102 0.123 780 -5.757  <.0001 
##  food - routines      -0.1389 0.186 781 -0.746  0.9896 
##  food - conv          -0.1936 0.159 779 -1.221  0.8859 
##  food - ac             0.0478 0.153 782  0.312  0.9999 
##  food - non_tcds      -0.6844 0.150 780 -4.571  0.0001 
##  routines - conv      -0.0547 0.158 779 -0.347  0.9999 
##  routines - ac         0.1867 0.151 775  1.236  0.8800 
##  routines - non_tcds  -0.5456 0.148 777 -3.677  0.0047 
##  conv - ac             0.2414 0.116 775  2.078  0.3668 
##  conv - non_tcds      -0.4908 0.112 774 -4.374  0.0003 
##  ac - non_tcds        -0.7322 0.103 766 -7.094  <.0001 
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 7 estimates
# plot
mlu_emmeans_en <- data.frame(emmeans(m2_english_mlu_all, ~ activity))


mlu_en <- ggplot(mlu_emmeans_en, aes(activity, emmean, colour = activity)) + 
  geom_pointrange(aes(ymin = lower.CL, ymax = upper.CL),
                  position = position_dodge(width = 0.2), 
                  size = 1) +
  scale_color_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) + 
  theme(legend.position = "none",
        text = element_text(size = 20),
        axis.text.x = element_text(angle = 40, vjust = .9, hjust=.9)) +
  scale_x_discrete(labels = c('Books','Play', 'Food', 'Routines', 'Unst. Conv.', 'Adult-Cent', 'non-tCDS')) +
  labs(x = "", y = "EMM", title = "English - MLUw")

mlu_en

# model diagnostics
# only takes into account fixed effects, not random effects
plot_model(m2_english_mlu_all, type = "diag")
## [[1]]

## 
## [[2]]
## [[2]]$id

## 
## 
## [[3]]

## 
## [[4]]

MLUw x ACTIVITY - SPANISH - ALL TALK

# comparing models
m1_spanish_mlu_all <- lmer(mlu_w ~ tokens_hr_child + 
                                (1 | id),
                                data = mlu_all_mm_sp, REML = F)

m2_spanish_mlu_all <- lmer(mlu_w ~ tokens_hr_child + activity +
                                (1 | id),
                                data = mlu_all_mm_sp, REML = F)

# see if adding the intx adds
anova(m1_spanish_mlu_all, m2_spanish_mlu_all)
## Data: mlu_all_mm_sp
## Models:
## m1_spanish_mlu_all: mlu_w ~ tokens_hr_child + (1 | id)
## m2_spanish_mlu_all: mlu_w ~ tokens_hr_child + activity + (1 | id)
##                    npar    AIC    BIC   logLik deviance  Chisq Df Pr(>Chisq)    
## m1_spanish_mlu_all    4 2049.0 2067.3 -1020.50   2041.0                         
## m2_spanish_mlu_all   10 1951.3 1997.2  -965.67   1931.3 109.65  6  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# anova table for final model
anova(m2_spanish_mlu_all)
## Type III Analysis of Variance Table with Satterthwaite's method
##                 Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## tokens_hr_child  6.343  6.3433     1  42.16  8.3497  0.006069 ** 
## activity        90.147 15.0246     6 696.37 19.7768 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# emmeans
m2_spanish_mlu_all_emmeans_activity <- emmeans(m2_spanish_mlu_all, ~ activity)

m2_spanish_mlu_all_emmeans_activity
##  activity emmean     SE  df lower.CL upper.CL
##  books      3.75 0.1653 531     3.42     4.07
##  play       2.81 0.1218 298     2.57     3.05
##  food       2.79 0.1388 411     2.52     3.07
##  routines   3.03 0.1333 381     2.77     3.30
##  conv       2.97 0.1051 189     2.76     3.18
##  ac         2.74 0.0933 123     2.55     2.92
##  non_tcds   3.55 0.0901 108     3.37     3.73
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
pairs(m2_spanish_mlu_all_emmeans_activity)
##  contrast            estimate     SE  df t.ratio p.value
##  books - play          0.9381 0.1831 714  5.125  <.0001 
##  books - food          0.9511 0.1969 719  4.830  <.0001 
##  books - routines      0.7115 0.1924 716  3.698  0.0044 
##  books - conv          0.7746 0.1733 718  4.471  0.0002 
##  books - ac            1.0058 0.1676 722  6.003  <.0001 
##  books - non_tcds      0.1930 0.1644 718  1.174  0.9037 
##  play - food           0.0130 0.1606 707  0.081  1.0000 
##  play - routines      -0.2267 0.1550 700 -1.463  0.7668 
##  play - conv          -0.1635 0.1317 701 -1.242  0.8775 
##  play - ac             0.0677 0.1229 705  0.551  0.9980 
##  play - non_tcds      -0.7451 0.1201 702 -6.203  <.0001 
##  food - routines      -0.2396 0.1691 704 -1.417  0.7927 
##  food - conv          -0.1765 0.1473 703 -1.198  0.8949 
##  food - ac             0.0547 0.1389 702  0.394  0.9997 
##  food - non_tcds      -0.7581 0.1370 702 -5.534  <.0001 
##  routines - conv       0.0631 0.1416 695  0.446  0.9994 
##  routines - ac         0.2944 0.1341 701  2.194  0.2998 
##  routines - non_tcds  -0.5185 0.1317 699 -3.937  0.0018 
##  conv - ac             0.2312 0.1061 701  2.180  0.3078 
##  conv - non_tcds      -0.5816 0.1029 696 -5.652  <.0001 
##  ac - non_tcds        -0.8128 0.0905 694 -8.979  <.0001 
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 7 estimates
# plot
mlu_emmeans_sp <- data.frame(emmeans(m2_spanish_mlu_all, ~ activity))


mlu_sp <- ggplot(mlu_emmeans_sp, aes(activity, emmean, colour = activity)) + 
  geom_pointrange(aes(ymin = lower.CL, ymax = upper.CL),
                  position = position_dodge(width = 0.2), 
                  size = 1) +
  scale_color_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) + 
  theme(legend.position = "none",
        text = element_text(size = 20),
        axis.text.x = element_text(angle = 40, vjust = .9, hjust=.9)) +
  scale_x_discrete(labels = c('Books','Play', 'Food', 'Routines', 'Unst. Conv.', 'Adult-Cent', 'non-tCDS')) +
  labs(x = "", y = "EMM", title = "Spanish - MLUw")

mlu_sp

# model diagnostics
# only takes into account fixed effects, not random effects
plot_model(m2_spanish_mlu_all, type = "diag")
## [[1]]

## 
## [[2]]
## [[2]]$id

## 
## 
## [[3]]

## 
## [[4]]

PROP RESPONSES x ACTIVITY - ENGLISH - ALL TALK

# comparing models
m1_english_propresp_all <- lmer(prop_adultresp_outof_childutt ~ tokens_hr_child +
                                (1 | id),
                                data = chip_mm_en, REML = F)

m2_english_propresp_all <- lmer(prop_adultresp_outof_childutt ~ tokens_hr_child + activity +
                                (1 | id),
                                data = chip_mm_en, REML = F)

# see if adding the intx adds
anova(m1_english_propresp_all, m2_english_propresp_all)
## Data: chip_mm_en
## Models:
## m1_english_propresp_all: prop_adultresp_outof_childutt ~ tokens_hr_child + (1 | id)
## m2_english_propresp_all: prop_adultresp_outof_childutt ~ tokens_hr_child + activity + 
## m2_english_propresp_all:     (1 | id)
##                         npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)   
## m1_english_propresp_all    4 1368.4 1385.6 -680.20   1360.4                        
## m2_english_propresp_all    9 1359.0 1397.7 -670.51   1341.0 19.387  5   0.001628 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# anova table for final model
anova(m2_english_propresp_all)
## Type III Analysis of Variance Table with Satterthwaite's method
##                 Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## tokens_hr_child 11.782 11.7816     1  48.74 17.5775 0.0001158 ***
## activity        13.260  2.6519     5 540.72  3.9565 0.0015620 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# emmeans
m2_english_propresp_all_emmeans_activity <- emmeans(m2_english_propresp_all, ~ activity)

m2_english_propresp_all_emmeans_activity
##  activity emmean     SE  df lower.CL upper.CL
##  books      2.44 0.1259 465     2.19     2.69
##  play       2.02 0.0870 334     1.85     2.19
##  food       1.89 0.1127 448     1.67     2.11
##  routines   1.86 0.1132 463     1.64     2.08
##  conv       1.91 0.0775 289     1.75     2.06
##  ac         1.86 0.0736 251     1.72     2.01
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
pairs(m2_english_propresp_all_emmeans_activity)
##  contrast         estimate    SE  df t.ratio p.value
##  books - play      0.41950 0.149 548  2.808  0.0576 
##  books - food      0.55247 0.165 550  3.355  0.0109 
##  books - routines  0.58229 0.166 550  3.517  0.0063 
##  books - conv      0.53418 0.143 549  3.737  0.0028 
##  books - ac        0.57586 0.142 546  4.046  0.0008 
##  play - food       0.13298 0.139 550  0.959  0.9305 
##  play - routines   0.16280 0.139 550  1.171  0.8504 
##  play - conv       0.11469 0.111 546  1.035  0.9061 
##  play - ac         0.15636 0.108 547  1.448  0.6976 
##  food - routines   0.02982 0.156 550  0.191  1.0000 
##  food - conv      -0.01829 0.132 547 -0.139  1.0000 
##  food - ac         0.02339 0.130 550  0.180  1.0000 
##  routines - conv  -0.04811 0.132 547 -0.363  0.9992 
##  routines - ac    -0.00643 0.130 546 -0.049  1.0000 
##  conv - ac         0.04168 0.101 545  0.414  0.9984 
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 6 estimates
# plot
propresp_emmeans_en <- data.frame(emmeans(m2_english_propresp_all, ~ activity))


resp_en <- ggplot(propresp_emmeans_en, aes(activity, emmean, colour = activity)) + 
  geom_pointrange(aes(ymin = lower.CL, ymax = upper.CL),
                  position = position_dodge(width = 0.2), 
                  size = 1) +
  scale_color_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey")) + 
  theme(legend.position = "none",
        text = element_text(size = 20),
        axis.text.x = element_text(angle = 40, vjust = .9, hjust=.9)) +
  scale_x_discrete(labels = c('Books','Play', 'Food', 'Routines', 'Unst. Conv.', 'Adult-Cent')) +
  labs(x = "", y = "EMM", title = "English - Proportion of Responses")


resp_en

# model diagnostics
# only takes into account fixed effects, not random effects
plot_model(m2_english_propresp_all, type = "diag")
## [[1]]

## 
## [[2]]
## [[2]]$id

## 
## 
## [[3]]

## 
## [[4]]

PROP RESPONSES x ACTIVITY - SPANISH - ALL TALK

# comparing models
m1_spanish_propresp_all <- lmer(prop_adultresp_outof_childutt ~ tokens_hr_child +
                                (1 | id),
                                data = chip_mm_sp, REML = F)

m2_spanish_propresp_all <- lmer(prop_adultresp_outof_childutt ~ tokens_hr_child + activity +
                                (1 | id),
                                data = chip_mm_sp, REML = F)

# see if adding the intx adds
anova(m1_spanish_propresp_all, m2_spanish_propresp_all)
## Data: chip_mm_sp
## Models:
## m1_spanish_propresp_all: prop_adultresp_outof_childutt ~ tokens_hr_child + (1 | id)
## m2_spanish_propresp_all: prop_adultresp_outof_childutt ~ tokens_hr_child + activity + 
## m2_spanish_propresp_all:     (1 | id)
##                         npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## m1_spanish_propresp_all    4 1060.9 1077.5 -526.43   1052.9                         
## m2_spanish_propresp_all    9 1045.0 1082.5 -513.52   1027.0 25.837  5  9.596e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# anova table for final model
anova(m2_spanish_propresp_all)
## Type III Analysis of Variance Table with Satterthwaite's method
##                  Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## tokens_hr_child  4.6205  4.6205     1  38.88 10.1463  0.002848 ** 
## activity        12.1288  2.4258     5 450.48  5.3268 9.077e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# emmeans
m2_spanish_propresp_all_emmeans_activity <- emmeans(m2_spanish_propresp_all, ~ activity)

m2_spanish_propresp_all_emmeans_activity
##  activity emmean     SE  df lower.CL upper.CL
##  books      2.15 0.1298 379     1.90     2.41
##  play       1.90 0.0982 258     1.70     2.09
##  food       1.78 0.1091 318     1.56     1.99
##  routines   1.94 0.1064 314     1.74     2.15
##  conv       1.91 0.0839 170     1.74     2.07
##  ac         1.60 0.0753 118     1.45     1.75
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
pairs(m2_spanish_propresp_all_emmeans_activity)
##  contrast         estimate     SE  df t.ratio p.value
##  books - play       0.2566 0.1458 465  1.761  0.4925 
##  books - food       0.3762 0.1564 474  2.406  0.1563 
##  books - routines   0.2092 0.1532 468  1.365  0.7477 
##  books - conv       0.2449 0.1376 469  1.780  0.4798 
##  books - ac         0.5520 0.1342 475  4.112  0.0007 
##  play - food        0.1195 0.1294 461  0.924  0.9403 
##  play - routines   -0.0475 0.1261 452 -0.376  0.9990 
##  play - conv       -0.0117 0.1082 454 -0.108  1.0000 
##  play - ac          0.2954 0.1023 460  2.888  0.0464 
##  food - routines   -0.1670 0.1352 457 -1.235  0.8195 
##  food - conv       -0.1312 0.1182 458 -1.110  0.8773 
##  food - ac          0.1758 0.1121 457  1.569  0.6192 
##  routines - conv    0.0358 0.1143 443  0.313  0.9996 
##  routines - ac      0.3428 0.1098 452  3.124  0.0232 
##  conv - ac          0.3071 0.0882 455  3.481  0.0072 
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 6 estimates
# plot
propresp_emmeans_sp <- data.frame(emmeans(m2_spanish_propresp_all, ~ activity))


resp_sp <- ggplot(propresp_emmeans_sp, aes(activity, emmean, colour = activity)) + 
  geom_pointrange(aes(ymin = lower.CL, ymax = upper.CL),
                  position = position_dodge(width = 0.2), 
                  size = 1) +
  scale_color_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey")) + 
  theme(legend.position = "none",
        text = element_text(size = 20),
        axis.text.x = element_text(angle = 40, vjust = .9, hjust=.9)) +
  scale_x_discrete(labels = c('Books','Play', 'Food', 'Routines', 'Unst. Conv.', 'Adult-Cent')) +
  labs(x = "", y = "EMM", title = "Spanish - Proportion of Responses")


resp_sp

# model diagnostics
# only takes into account fixed effects, not random effects
plot_model(m2_spanish_propresp_all, type = "diag")
## [[1]]

## 
## [[2]]
## [[2]]$id

## 
## 
## [[3]]

## 
## [[4]]

PROP IMIT/EXP x ACTIVITY - ENGLISH - ALL TALK; no sig main effect of activity

# comparing models
m1_english_propimitexp_all <- lmer(prop_adult_imitexp_outof_childutt ~ tokens_hr_child +
                                (1 | id),
                                data = chip_mm_en, REML = F)

m2_english_propimitexp_all <- lmer(prop_adult_imitexp_outof_childutt ~ tokens_hr_child + activity +
                                (1 | id),
                                data = chip_mm_en, REML = F)

# see if adding the intx adds
anova(m1_english_propimitexp_all, m2_english_propimitexp_all)
## Data: chip_mm_en
## Models:
## m1_english_propimitexp_all: prop_adult_imitexp_outof_childutt ~ tokens_hr_child + (1 | id)
## m2_english_propimitexp_all: prop_adult_imitexp_outof_childutt ~ tokens_hr_child + activity + 
## m2_english_propimitexp_all:     (1 | id)
##                            npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## m1_english_propimitexp_all    4 430.04 447.23 -211.02   422.04                     
## m2_english_propimitexp_all    9 431.88 470.56 -206.94   413.88 8.1564  5     0.1478
# anova table for final model
anova(m2_english_propimitexp_all)
## Type III Analysis of Variance Table with Satterthwaite's method
##                  Sum Sq Mean Sq NumDF  DenDF F value Pr(>F)
## tokens_hr_child 0.15738 0.15738     1  42.25  1.2786 0.2645
## activity        1.01328 0.20266     5 540.18  1.6465 0.1459
# emmeans
m2_english_propimitexp_all_emmeans_activity <- emmeans(m2_english_propimitexp_all, ~ activity)

m2_english_propimitexp_all_emmeans_activity
##  activity emmean     SE  df lower.CL upper.CL
##  books     0.505 0.0532 468    0.400    0.609
##  play      0.409 0.0365 355    0.337    0.481
##  food      0.360 0.0476 456    0.267    0.454
##  routines  0.353 0.0478 473    0.259    0.447
##  conv      0.385 0.0324 317    0.321    0.449
##  ac        0.347 0.0307 276    0.286    0.407
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
# plot
propimitexp_emmeans_en <- data.frame(emmeans(m2_english_propimitexp_all, ~ activity))

imit_exp_en <- ggplot(propimitexp_emmeans_en, aes(activity, emmean, colour = activity)) + 
  geom_pointrange(aes(ymin = lower.CL, ymax = upper.CL),
                  position = position_dodge(width = 0.2), 
                  size = 1) +
  scale_color_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey")) + 
  theme(legend.position = "none",
        text = element_text(size = 20),
        axis.text.x = element_text(angle = 40, vjust = .9, hjust=.9)) +
  scale_x_discrete(labels = c('Books','Play', 'Food', 'Routines', 'Unst. Conv.', 'Adult-Cent')) +
  labs(x = "", y = "EMM", title = "English - Proportion of Imitations/Expansions")

imit_exp_en

# model diagnostics
# only takes into account fixed effects, not random effects
plot_model(m2_english_propimitexp_all, type = "diag")
## [[1]]

## 
## [[2]]
## [[2]]$id

## 
## 
## [[3]]

## 
## [[4]]

PROP IMIT/EXP x ACTIVITY - SPANISH - ALL TALK; no sig main effect of activity

# comparing models
m1_spanish_propimitexp_all <- lmer(prop_adult_imitexp_outof_childutt ~ tokens_hr_child +
                                (1 | id),
                                data = chip_mm_sp, REML = F)

m2_spanish_propimitexp_all <- lmer(prop_adult_imitexp_outof_childutt ~ tokens_hr_child + activity +
                                (1 | id),
                                data = chip_mm_sp, REML = F)

# see if adding the intx adds
anova(m1_spanish_propimitexp_all, m2_spanish_propimitexp_all)
## Data: chip_mm_sp
## Models:
## m1_spanish_propimitexp_all: prop_adult_imitexp_outof_childutt ~ tokens_hr_child + (1 | id)
## m2_spanish_propimitexp_all: prop_adult_imitexp_outof_childutt ~ tokens_hr_child + activity + 
## m2_spanish_propimitexp_all:     (1 | id)
##                            npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## m1_spanish_propimitexp_all    4 268.52 285.17 -130.26   260.52                     
## m2_spanish_propimitexp_all    9 271.29 308.74 -126.64   253.29 7.2343  5     0.2038
# anova table for final model
anova(m2_spanish_propimitexp_all)
## Type III Analysis of Variance Table with Satterthwaite's method
##                  Sum Sq  Mean Sq NumDF  DenDF F value Pr(>F)
## tokens_hr_child 0.03168 0.031682     1  46.44  0.3457 0.5594
## activity        0.67092 0.134185     5 459.82  1.4642 0.2002
# emmeans
m2_spanish_propimitexp_all_emmeans_activity <- emmeans(m2_spanish_propimitexp_all, ~ activity)

m2_spanish_propimitexp_all_emmeans_activity
##  activity emmean     SE  df lower.CL upper.CL
##  books     0.376 0.0561 400    0.265    0.486
##  play      0.393 0.0417 307    0.311    0.475
##  food      0.343 0.0468 362    0.251    0.435
##  routines  0.442 0.0456 366    0.352    0.531
##  conv      0.376 0.0349 209    0.308    0.445
##  ac        0.321 0.0308 142    0.261    0.382
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
# plot
propimitexp_emmeans_sp <- data.frame(emmeans(m2_spanish_propimitexp_all, ~ activity))

imit_exp_sp <- ggplot(propimitexp_emmeans_sp, aes(activity, emmean, colour = activity)) + 
  geom_pointrange(aes(ymin = lower.CL, ymax = upper.CL),
                  position = position_dodge(width = 0.2), 
                  size = 1) +
  scale_color_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey")) + 
  theme(legend.position = "none",
        text = element_text(size = 20),
        axis.text.x = element_text(angle = 40, vjust = .9, hjust=.9)) +
  scale_x_discrete(labels = c('Books','Play', 'Food', 'Routines', 'Unst. Conv.', 'Adult-Cent')) +
  labs(x = "", y = "EMM", title = "Spanish - Proportion of Imitations/Expansions")

imit_exp_sp

# model diagnostics
# only takes into account fixed effects, not random effects
plot_model(m2_spanish_propimitexp_all, type = "diag")
## [[1]]

## 
## [[2]]
## [[2]]$id

## 
## 
## [[3]]

## 
## [[4]]

Combining plots

# grid all
ggarrange(tokens_en, types_en, mlu_en, resp_en, imit_exp_en, common.legend = T, legend = "bottom")

ggarrange(tokens_sp, types_sp, mlu_sp, resp_sp, imit_exp_sp, common.legend = T, legend = "bottom")